### The following R version was used when preparing the replication files:
# R version 4.3.3 (2024-02-29 ucrt) -- "Angel Food Cake"

# Setting the working directory to the "replication" folder (the folder stores all the data sets used in the article)
setwd("C:/Users/ivan.petrusek/Desktop/unemployment")


##### The following script consists of three parts:
### I. Data management of the individual-level variables
### II. Data management of the contextual-level variables
### III. Centering variables



############################################################
### I. Data management of the individual-level variables ###
############################################################

### Loading the reduced version of the European Social Survey SPSS file
##### loading the SPSS file
library(haven)
ess <- read_sav("ESS1-9e01_1_reduced.sav", user_na = FALSE)
class(ess)
# 422985 respondents and 21 variables
dim(ess)



########## OUTCOME VARIABLE
# reversing scale on the dependent variable "red_sup" (gincdif: 1 = agree strongly, 5 = disagree strongly)
ess$red_sup <- 5 - ess$gincdif
table(ess$red_sup)
table(is.na(ess$red_sup))



########## EXPLANATORY VARIABLES

##### Economic activity
## Coding of the original mnactic variable:
# 1 Paid work
# 2 Education
# 3 Unemployed, looking for job
# 4 Unemployed, not looking for job
# 5 Permanently sick or disabled
# 6 Retired
# 7 Community or military service
# 8 Housework, looking after children, others
# 9 Other

### Creating two dummy variables for the economic activity:
# unemployed
ess$unemployed <- ifelse(ess$mnactic == 3 | ess$mnactic == 4, yes = 1, no = 0) 
table(ess$unemployed)
table(ess$mnactic, ess$unemployed)
table(is.na(ess$unemployed))

# employed
ess$employed <- ifelse(ess$mnactic == 1 | ess$mnactic == 7, yes = 1, no = 0) 
table(ess$mnactic, ess$employed)
table(is.na(ess$employed))


##### Female
## Coding of the original gndr variable:
# 1 Male
# 2 Female
# Creating a dummy variable for female:
ess$female[ess$gndr == 1] <- 0
ess$female[ess$gndr == 2] <- 1
table(ess$female)
table(is.na(ess$female))


##### Age excluding respondents younger than 15 years old and respondents older than 99 years
table(ess$agea)
ess$age <- ifelse(ess$agea > 14 & ess$agea < 100, yes = ess$agea, no = NA)
table(ess$age)
table(is.na(ess$age))


##### Education attainment
## Coding of the original edulvla variable:
# 1 Less than lower secondary education (ISCED 0-1)
# 2 Lower secondary education completed (ISCED 2)
# 3 Upper secondary education completed (ISCED 3)
# 4 Post-secondary non-tertiary education completed (ISCED 4)
# 5 Tertiary education completed (ISCED 5-6)
# 55 Other
table(ess$edulvla)

### Creating four dummy variables for the education attainment
# isced 2 = Lower secondary education completed (ISCED 2)
ess$isced2 <- ifelse(ess$edulvla == 2, yes = 1, no = 0)
table(ess$isced2)
table(is.na(ess$isced2))

# isced 3 = Upper secondary education completed (ISCED 3)
ess$isced3 <- ifelse(ess$edulvla == 3, yes = 1, no = 0)
table(ess$isced3)
table(is.na(ess$isced3))

# isced 4 = Post-secondary non-tertiary education completed (ISCED 4)
ess$isced4 <- ifelse(ess$edulvla == 4, yes = 1, no = 0)
table(ess$isced4)
table(is.na(ess$isced4))

# isced 56 = Tertiary education completed (ISCED 5-6)
ess$isced56 <- ifelse(ess$edulvla == 5, yes = 1, no = 0)
table(ess$isced56)
table(is.na(ess$isced56))


### Subjective evaluation of the household income
## Coding of the original hincfel variable:
# 1 Living comfortably on present income
# 2 Coping on present income
# 3 Difficult on present income
# 4 Very difficult on present income
table(ess$hincfel)

### Creating three dummy variables for the subjective household income variable:
# hinc_2_cop = 2 Coping on present income
ess$hinc_2_cop <- ifelse(ess$hincfel == 2, yes = 1, no = 0)
table(ess$hincfel, ess$hinc_2_cop)

# hinc_3_dif = 3 Difficult on present income
ess$hinc_3_dif <- ifelse(ess$hincfel == 3, yes = 1, no = 0)
table(ess$hincfel, ess$hinc_3_dif)

# hinc_4_vdif = 4 Very difficult on present income
ess$hinc_4_vdif <- ifelse(ess$hincfel == 4, yes = 1, no = 0)
table(ess$hincfel, ess$hinc_4_vdif)



# creating country-round variable
ess$cntry_round <- paste(ess$cntry, ess$essround, "")
table(ess$cntry_round)
length(table(ess$cntry_round)) # 223 country-rounds


##### Checking which ESS rounds are available within countries
table(ess$cntry, ess$essround)

##### Selecting countries with at least FOUR  ESS rounds
### The subset of the countries is stored into the ess_4 data.frame
### 9 or 8 or 7 or 6 or 5 or 4 rounds: 27 countries, 204 country-rounds
# Russia and Ukraine are not included (since the contextual variables are not available for these two countries)
ess_4  <-  ess[ess$cntry == "AT" | ess$cntry == "BE" | ess$cntry == "BG" | ess$cntry == "CY" | 
                 ess$cntry == "CZ" | ess$cntry == "DE" | ess$cntry == "DK" | ess$cntry == "EE" |
                 ess$cntry == "ES" | ess$cntry == "FI" | ess$cntry == "FR" | ess$cntry == "GB" | 
                 ess$cntry == "GR" | ess$cntry == "HU" | ess$cntry == "CH" | ess$cntry == "IE" | 
                 ess$cntry == "IL" | ess$cntry == "IS" | ess$cntry == "IT" | ess$cntry == "LT" | 
                 ess$cntry == "NL" | ess$cntry == "NO" | ess$cntry == "PL" | ess$cntry == "PT" | 
                 ess$cntry == "SE" | ess$cntry == "SI" | ess$cntry == "SK", ]
table(ess_4$cntry)
# 27 countries
length(table(ess_4$cntry))
table(ess_4$cntry, ess_4$essround)
dim(ess_4)

# 204 country-rounds
length(table(ess_4$cntry_round))

# Deleting the original full ESS data.frame
rm(ess)





#############################################################
### II. Data management of the contextual-level variables ###
#############################################################

##### This script loads datasets on four contextual country-level time-varying variables.
##### Then, the script assigns the required contextual information to the respective ESS country-rounds.
##### At the end, the script merges this harmonized information with the individual-level ESS data.

# Creating a function "country_code_2d" for assigning two-digit ISO country code to OECD_data$cntry column 
# (based on three-digit ISO country code from the OECD database)
# this procedure applies only to countries that fielded at least one European Social Survey (ESS) round
# source of country codes:  https://countrycode.org

country_code_2d <- function(OECD_data){
  for(i in 1:dim(OECD_data)[1]){
    if(OECD_data[i,1] == "ALB"){
      OECD_data[i,dim(OECD_data)[2]] <- "AL"
    }else if(OECD_data[i,1] == "AUT"){
      OECD_data[i,dim(OECD_data)[2]] <- "AT"
    }else if(OECD_data[i,1] == "BEL"){
      OECD_data[i,dim(OECD_data)[2]] <- "BE"
    }else if(OECD_data[i,1] == "BGR"){
      OECD_data[i,dim(OECD_data)[2]] <- "BG"
    }else if(OECD_data[i,1] == "HRV"){
      OECD_data[i,dim(OECD_data)[2]] <- "HR"
    }else if(OECD_data[i,1] == "CYP"){
      OECD_data[i,dim(OECD_data)[2]] <- "CY"
    }else if(OECD_data[i,1] == "CZE"){
      OECD_data[i,dim(OECD_data)[2]] <- "CZ"
    }else if(OECD_data[i,1] == "DNK"){
      OECD_data[i,dim(OECD_data)[2]] <- "DK"
    }else if(OECD_data[i,1] == "EST"){
      OECD_data[i,dim(OECD_data)[2]] <- "EE"
    }else if(OECD_data[i,1] == "FIN"){
      OECD_data[i,dim(OECD_data)[2]] <- "FI"
    }else if(OECD_data[i,1] == "FRA"){
      OECD_data[i,dim(OECD_data)[2]] <- "FR"
    }else if(OECD_data[i,1] == "DEU"){
      OECD_data[i,dim(OECD_data)[2]] <- "DE"
    }else if(OECD_data[i,1] == "GRC"){
      OECD_data[i,dim(OECD_data)[2]] <- "GR"
    }else if(OECD_data[i,1] == "HUN"){
      OECD_data[i,dim(OECD_data)[2]] <- "HU"
    }else if(OECD_data[i,1] == "ISL"){
      OECD_data[i,dim(OECD_data)[2]] <- "IS"
    }else if(OECD_data[i,1] == "IRL"){
      OECD_data[i,dim(OECD_data)[2]] <- "IE"
    }else if(OECD_data[i,1] == "ISR"){
      OECD_data[i,dim(OECD_data)[2]] <- "IL"
    }else if(OECD_data[i,1] == "ITA"){
      OECD_data[i,dim(OECD_data)[2]] <- "IT"
    }else if(OECD_data[i,1] == "XKX"){
      OECD_data[i,dim(OECD_data)[2]] <- "XK"
    }else if(OECD_data[i,1] == "LVA"){
      OECD_data[i,dim(OECD_data)[2]] <- "LV"
    }else if(OECD_data[i,1] == "LTU"){
      OECD_data[i,dim(OECD_data)[2]] <- "LT"
    }else if(OECD_data[i,1] == "LUX"){
      OECD_data[i,dim(OECD_data)[2]] <- "LU"
    }else if(OECD_data[i,1] == "NLD"){
      OECD_data[i,dim(OECD_data)[2]] <- "NL"
    }else if(OECD_data[i,1] == "MNE"){
      OECD_data[i,dim(OECD_data)[2]] <- "ME"  
    }else if(OECD_data[i,1] == "NOR"){
      OECD_data[i,dim(OECD_data)[2]] <- "NO"
    }else if(OECD_data[i,1] == "POL"){
      OECD_data[i,dim(OECD_data)[2]] <- "PL"
    }else if(OECD_data[i,1] == "PRT"){
      OECD_data[i,dim(OECD_data)[2]] <- "PT"
    }else if(OECD_data[i,1] == "SRB"){
      OECD_data[i,dim(OECD_data)[2]] <- "RS"
    }else if(OECD_data[i,1] == "ROU"){
      OECD_data[i,dim(OECD_data)[2]] <- "RO"
    }else if(OECD_data[i,1] == "RUS"){
      OECD_data[i,dim(OECD_data)[2]] <- "RU"
    }else if(OECD_data[i,1] == "SVK"){
      OECD_data[i,dim(OECD_data)[2]] <- "SK"
    }else if(OECD_data[i,1] == "SVN"){
      OECD_data[i,dim(OECD_data)[2]] <- "SI"
    }else if(OECD_data[i,1] == "ESP"){
      OECD_data[i,dim(OECD_data)[2]] <- "ES"
    }else if(OECD_data[i,1] == "SWE"){
      OECD_data[i,dim(OECD_data)[2]] <- "SE"
    }else if(OECD_data[i,1] == "CHE"){
      OECD_data[i,dim(OECD_data)[2]] <- "CH"
    }else if(OECD_data[i,1] == "TUR"){
      OECD_data[i,dim(OECD_data)[2]] <- "TR"
    }else if(OECD_data[i,1] == "UKR"){
      OECD_data[i,dim(OECD_data)[2]] <- "UA"
    }else if(OECD_data[i,1] == "GBR"){
      OECD_data[i,dim(OECD_data)[2]] <- "GB"
    }else{
      OECD_data[i,dim(OECD_data)[2]] <- ""
    }
  }
  return(OECD_data)
}



##### Uploading list of ESS country-rounds with a specified KEY QUARTAL (i.e. a quartal, when majority of data per country and ESS round was fielded) and the corresponding KEY YEAR
# key_quartal column refers to the quarter of a year when (the majority of) the fieldwork took place in a particular country round 
# key_quartal column refers to the year when (the majority of) the fieldwork took place in a particular country round 
ESS_data_context <- read.csv("./FIELDWORK_PERIODS.csv", header = TRUE, sep = ";", dec = ".")



### 1.) HARMONIZED UNEMPLOYMENT RATE
# Reading the quarterly Harmonized unemployment rate from OECD database
# The downloaded OECD data was downloaded to contain quarterly data from 2000-Q1 to 2021-Q1 (i.e. data for 85 quarters)
OECD_HUR_data <- read.csv("./DP_LIVE_25062021174702091.csv", header = TRUE, sep = ",", dec = ".")
dim(OECD_HUR_data)

# Assigning the two-digit country code
OECD_HUR_data$cntry <- rep(NA)
OECD_HUR_data <- country_code_2d(OECD_HUR_data)

# Subsetting only OECD countries that are part of the ESS
OECD_HUR_data_ess <- OECD_HUR_data[OECD_HUR_data$cntry != "", ]
rm(OECD_HUR_data)

table(OECD_HUR_data_ess$INDICATOR)
table(OECD_HUR_data_ess$FREQUENCY)
# 28 ESS countries available
length(table(OECD_HUR_data_ess$cntry))


##### ANALYSIS OF MISSING DATA
# Greece (GR) is missing one quarter (2021-Q1) - NOT A PROBLEM
# Switzerland (CH) has a broken time series (between 2000 and 2009, only data for second quarters [Q2] is available) - this constitutes a problem
# Iceland (IS) has only data available since 2003-Q1 (i.e. 12 quarters are missing) - NOT A PROBLEM (ESS data for round two was collected during 2005-Q3)
# Norway (NO) is missing two quarters (2020-Q4 and 2021-Q1) - NOT A PROBLEM
# Turkey (TR) has only data available since 2005-Q1 and 2020-Q1 is missing (i.e. 21 quarters are missing) - NOT A PROBLEM (Turkey only fielded two ESS rounds and is not part of the analysis)
tapply(OECD_HUR_data_ess$TIME, OECD_HUR_data_ess$cntry, length)

### FILLING "ESS_data_context" table with HUR data
# Creating an empty variable for HUR during the country-year fieldwork quarter (i.e. quarter when most of the individual-level data was collected)
ESS_data_context$HUR_q0 <- rep(NA, nrow(ESS_data_context))

# Creating an empty variable for HUR during the previous quarter to the country-year fieldwork quarter (i.e. t-1)
ESS_data_context$HUR_qm1 <- rep(NA, nrow(ESS_data_context))

# The assignment of HUR 
for(i in 1:dim(ESS_data_context)[1]){
  if(dim(table(OECD_HUR_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_HUR_data_ess$TIME == as.character(ESS_data_context$key_quartal[i]))) == 2){
    x <- which(OECD_HUR_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_HUR_data_ess$TIME == as.character(ESS_data_context$key_quartal[i]))
    ESS_data_context[i, c("HUR_qm1", "HUR_q0")] <- OECD_HUR_data_ess$Value[(x-1):x]
  }
}

# checking how many country-year combinations are missing (33)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# displaying which countries AND key quarters have missing contextual variables
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]

##### adjustments of assignments based on the above analysis of missing data
### Switzerland (CH) - data are filled in based on available data from Q2 (from the second quarter)
ESS_data_context$HUR_q0[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2008-Q4"] <- (OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2008-Q2"] + OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2009-Q2"])/2
ESS_data_context$HUR_qm1[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2008-Q4"] <- OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2008-Q2"]

ESS_data_context$HUR_q0[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2006-Q4"] <- (OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2006-Q2"] + OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2007-Q2"])/2
ESS_data_context$HUR_qm1[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2006-Q4"] <- OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2006-Q2"]

ESS_data_context$HUR_q0[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2004-Q4"] <- (OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2004-Q2"] + OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2005-Q2"])/2
ESS_data_context$HUR_qm1[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2004-Q4"] <- OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2004-Q2"]

ESS_data_context$HUR_q0[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2002-Q4"] <- (OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2002-Q2"] + OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2003-Q2"])/2
ESS_data_context$HUR_qm1[ESS_data_context$cntry == "CH" & ESS_data_context$key_quartal == "2002-Q4"] <- OECD_HUR_data_ess$Value[OECD_HUR_data_ess$cntry == "CH" & OECD_HUR_data_ess$TIME == "2002-Q2"]

##### Harmonised unemployment rate data for Bulgaria (BG) and Cyprus (CY) were obtained from Eurostat: online data code = TIPSUN30
HUR_BG_CY <- read.csv("./TIPSUN30_BG_CY_2000_2020.csv", header = TRUE, sep = ";", dec = ".")

### BG
# Bulgaria: essround 3, 2006-Q4
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$essround == 3, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[1, c("X2006.Q3", "X2006.Q4")]

# Bulgaria: essround 4, 2009-Q1
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$essround == 4, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[1, c("X2008.Q4", "X2009.Q1")]

# Bulgaria: essround 5, 2011-Q1
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$essround == 5, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[1, c("X2010.Q4", "X2011.Q1")]

# Bulgaria: essround 6, 2013-Q1
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$essround == 6, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[1, c("X2012.Q4", "X2013.Q1")]

# Bulgaria: essround 9, 2018-Q4
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$essround == 9, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[1, c("X2018.Q3", "X2018.Q4")]


### CY
# Cyprus: essround 3, 2006-Q4
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$essround == 3, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[2, c("X2006.Q3", "X2006.Q4")]

# Cyprus: essround 4, 2008-Q4
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$essround == 4, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[2, c("X2008.Q3", "X2008.Q4")]

# Cyprus: essround 5, 2011-Q1
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$essround == 5, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[2, c("X2010.Q4", "X2011.Q1")]

# Cyprus: essround 6, 2012-Q4
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$essround == 6, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[2, c("X2012.Q3", "X2012.Q4")]

# Cyprus: essround 9, 2018-Q4
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$essround == 9, c("HUR_qm1", "HUR_q0")] <- HUR_BG_CY[2, c("X2018.Q3", "X2018.Q4")]

# checking how many country-year combinations are missing now: 19 (33 - 4 - 5 - 5 = 19)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]



### 2.) PUBLIC SOCIAL EXPENDITURE (as % of GDP)
# Reading the yearly public social expenditure from OECD database
# The OECD data was downloaded to contain yearly data from 2000 to 2019 (i.e. data for 20 years)
# Source: https://data.oecd.org/socialexp/social-spending.htm
OECD_SOC_EXP_data <- read.csv("./DP_LIVE_14072021180102531.csv", header = TRUE, sep = ",", dec = ".")
dim(OECD_SOC_EXP_data)

# Assigning the two-digit country code
OECD_SOC_EXP_data$cntry <- rep(NA)
OECD_SOC_EXP_data <- country_code_2d(OECD_SOC_EXP_data)

# Subsetting only OECD countries that are part of the ESS
OECD_SOC_EXP_data_ess <- OECD_SOC_EXP_data[OECD_SOC_EXP_data$cntry != "", ]
rm(OECD_SOC_EXP_data)

table(OECD_SOC_EXP_data_ess$INDICATOR)
table(OECD_SOC_EXP_data_ess$FREQUENCY)
# 28 ESS countries available
length(table(OECD_SOC_EXP_data_ess$cntry))


##### ANALYSIS OF MISSING DATA
# Switzerland (CH) is missig the data for 2019 - NOT A PROBLEM (as ESS round 9 was gathered during 2018-Q4 in Switzerland)
tapply(OECD_SOC_EXP_data_ess$TIME, OECD_SOC_EXP_data_ess$cntry, length)
tapply(OECD_SOC_EXP_data_ess$TIME, OECD_SOC_EXP_data_ess$cntry, range)

### FILLING "ESS_data_context" table with SOCEXP data
# Creating an empty variable for SOCEXP during the country-year fieldwork year
ESS_data_context$SOCEXP_y0 <- rep(NA, nrow(ESS_data_context))
# Creating an empty variable for SOCEXP during the year previous to the country-year fieldwork year (i.e. t-1)
ESS_data_context$SOCEXP_ym1 <- rep(NA, nrow(ESS_data_context))

# The assignment of SOCEXP
for(i in 1:dim(ESS_data_context)[1]){
  if(dim(table(OECD_SOC_EXP_data_ess$cntry == as.character(ESS_data_context$cntry[i]))) == 2 & table(OECD_SOC_EXP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_SOC_EXP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))[2] == 1){
    x <- which(OECD_SOC_EXP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_SOC_EXP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))
    ESS_data_context[i, c("SOCEXP_ym1", "SOCEXP_y0")] <- OECD_SOC_EXP_data_ess$Value[(x-1):x]}
}

# checking how many country-year combinations are missing (29)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# displaying which countries AND key quartals have missing contextual variable
# Bulgaria (BG) and Cyprus (CY) have missing data 
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]

# Eurostat data for Bulgaria (BG) and Cyprus (CY): online data code = SPR_EXP_SUM
# Source: https://ec.europa.eu/eurostat/databrowser/view/SPR_EXP_SUM__custom_1146171/default/table?lang=en
# Percentage of gross domestic product (GDP),	Social protection benefits,	Annual
SOCEXP_BG_CY <- read.csv("./SPR_EXP_SUM__BG_CY_2000_2018.csv", header = TRUE, sep = ";", dec = ".")

### BG
# Bulgaria: essround 3, 2006
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2006, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2006", "X2005")]

# Bulgaria: essround 4, 2009
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2009, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2009", "X2008")]

# Bulgaria: essround 5, 2011
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2011, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2011", "X2010")]

# Bulgaria: essround 6, 2013
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2013, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2013", "X2012")]

# Bulgaria: essround 9, 2018
ESS_data_context[ESS_data_context$cntry == "BG" & ESS_data_context$key_year == 2018, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[1, c("X2018", "X2017")]


### CY
# Cyprus: essround 3, 2006
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2006, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2006", "X2005")]

# Cyprus: essround 4, 2008
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2008, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2008", "X2007")]

# Cyprus: essround 5, 2011
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2011, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2011", "X2010")]

# Cyprus: essround 6, 2012
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2012, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2012", "X2011")]

# Cyprus: essround 9, 2018
ESS_data_context[ESS_data_context$cntry == "CY" & ESS_data_context$key_year == 2018, c("SOCEXP_y0", "SOCEXP_ym1")] <- SOCEXP_BG_CY[2, c("X2018", "X2017")]

# checking how many country-year combinations are missing now: 19 (29 - 5 - 5 = 19)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]



### 3.) INCOME INEQUALITY (Gini coefficient of disposable income)
# Source: SWIID database, Version 9.3: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/LM4OWF

load("./swiid9_3.rda")

class(swiid_summary)
dim(swiid_summary) ### 5910 by 12
colnames(swiid_summary)

# adding an empty column "cntry" as a placeholder for ESS country variable
swiid_summary$cntry <- as.character(rep(NA))

# assigning two-digit ISO country code to OECD_data$cntry column
for(i in 1:dim(swiid_summary)[1]){
  if(swiid_summary[i,1] == "Albania"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "AL"
  }else if(swiid_summary[i,1] == "Austria"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "AT"
  }else if(swiid_summary[i,1] == "Belgium"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "BE"
  }else if(swiid_summary[i,1] == "Bulgaria"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "BG"
  }else if(swiid_summary[i,1] == "Croatia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "HR"
  }else if(swiid_summary[i,1] == "Cyprus"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "CY"
  }else if(swiid_summary[i,1] == "Czech Republic"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "CZ"
  }else if(swiid_summary[i,1] == "Denmark"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "DK"
  }else if(swiid_summary[i,1] == "Estonia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "EE"
  }else if(swiid_summary[i,1] == "Finland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "FI"
  }else if(swiid_summary[i,1] == "France"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "FR"
  }else if(swiid_summary[i,1] == "Germany"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "DE"
  }else if(swiid_summary[i,1] == "Greece"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "GR"
  }else if(swiid_summary[i,1] == "Hungary"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "HU"
  }else if(swiid_summary[i,1] == "Iceland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IS"
  }else if(swiid_summary[i,1] == "Ireland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IE"
  }else if(swiid_summary[i,1] == "Israel"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IL"
  }else if(swiid_summary[i,1] == "Italy"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "IT"
  }else if(swiid_summary[i,1] == "Kosovo"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "XK"
  }else if(swiid_summary[i,1] == "Latvia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "LV"
  }else if(swiid_summary[i,1] == "Lithuania"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "LT"
  }else if(swiid_summary[i,1] == "Luxembourg"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "LU"
  }else if(swiid_summary[i,1] == "Netherlands"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "NL"
  }else if(swiid_summary[i,1] == "Norway"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "NO"
  }else if(swiid_summary[i,1] == "Poland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "PL"
  }else if(swiid_summary[i,1] == "Portugal"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "PT"
  }else if(swiid_summary[i,1] == "Romania"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "RO"
  }else if(swiid_summary[i,1] == "Russia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "RU"
  }else if(swiid_summary[i,1] == "Slovakia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "SK"
  }else if(swiid_summary[i,1] == "Slovenia"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "SI"
  }else if(swiid_summary[i,1] == "Spain"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "ES"
  }else if(swiid_summary[i,1] == "Sweden"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "SE"
  }else if(swiid_summary[i,1] == "Switzerland"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "CH"
  }else if(swiid_summary[i,1] == "Turkey"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "TR"
  }else if(swiid_summary[i,1] == "Ukraine"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "UA"
  }else if(swiid_summary[i,1] == "United Kingdom"){
    swiid_summary[i,dim(swiid_summary)[2]] <- "GB"
  }else{
    swiid_summary[i,dim(swiid_summary)[2]] <- ""
  }
}

# checking the assignment of country codes
table(swiid_summary$cntry)
length(table(swiid_summary$cntry))
table(swiid_summary$country, swiid_summary$cntry)

# Subsetting only those 36 countries that are part of ESS
swiid_ess <- swiid_summary[swiid_summary$cntry != "", ]
dim(swiid_ess)

##### ANALYSIS OF MISSING DATA
# Ideally, we need data covering period from 2000 to 2018-2019.
# This is the case for all analysed countries except for Iceland (IS).
# The missing values for Iceland 2019 will be imputed based on the latest available value (i.e. 2017)
tapply(swiid_ess$year, swiid_ess$cntry, range)
tapply(swiid_ess$year, swiid_ess$cntry, length)
tapply(swiid_ess$year, swiid_ess$cntry, min)
tapply(swiid_ess$year, swiid_ess$cntry, max)

### checking whether each ESS country has a yearly data; for each country: [(maximum-minimum)+1)/length]
((tapply(swiid_ess$year, swiid_ess$cntry, max)-tapply(swiid_ess$year, swiid_ess$cntry, min))+1)/tapply(swiid_ess$year, swiid_ess$cntry, length)


########## gini_disp: Estimate of Gini index of inequality in equivalized (square root scale) household DISPOSABLE (post-tax, post-transfer) income
# Creating an empty variable for Gini index during the country-year fieldwork year
ESS_data_context$gini_disp_y0 <- rep(NA, nrow(ESS_data_context))
# Creating an empty variable for Gini index during the year previous to the country-year fieldwork year (i.e. t-1)
ESS_data_context$gini_disp_ym1 <- rep(NA, nrow(ESS_data_context))

### FILLING "ESS_data_context" table with SWIID gini_disp data
for(i in 1:dim(ESS_data_context)[1]){
  if(!is.na(table(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]) & swiid_ess$year == as.character(ESS_data_context$key_year[i]))[2]) == TRUE){
    if(dim(table(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]))) == 2 & table(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]) & swiid_ess$year == as.character(ESS_data_context$key_year[i]))[2] == 1){
      x <- which(swiid_ess$cntry == as.character(ESS_data_context$cntry[i]) & swiid_ess$year == as.character(ESS_data_context$key_year[i]))
      ESS_data_context[i, c("gini_disp_ym1", "gini_disp_y0")] <- swiid_ess$gini_disp[(x-1):x]}}
}

# checking how many country-year combinations are missing (3)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# Iceland (IS) 2019 is missing
# Assigning the latest availale Gini coeficient (i.e. from 2017) to Iceland 2019
ESS_data_context[ESS_data_context$cntry == "IS" & ESS_data_context$key_year == 2019, c("gini_disp_y0")] <- swiid_ess$gini_disp[swiid_ess$cntry == "IS" & swiid_ess$year == 2017]
ESS_data_context[ESS_data_context$cntry == "IS" & ESS_data_context$key_year == 2019, c("gini_disp_ym1")] <- swiid_ess$gini_disp[swiid_ess$cntry == "IS" & swiid_ess$year == 2017]

# displaying which countries AND key quartals have missing contextual variables (only two country-years that will not be part of the analysis are missing)
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]

# checking how many country-year combinations are missing (after the correction, there are only 2)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]
# remowing the swiid list
rm(swiid)



### 4.) GROSS DOMESTIC PRODUCT - used as a control variable only in some models
# Supplementary material: TABLE A5. Hierarchical linear models of redistribution support: including GDP per capita (in USD).
# Reading the gross domestic product from OECD database
# The OECD data was downloaded to contain yearly data from 2000 to 2021
OECD_GDP_data <- read.csv("./DP_LIVE_12072022131229628.csv", header = TRUE, sep = ",", dec = ".")
dim(OECD_GDP_data)

# Assigning the two-digit country code
OECD_GDP_data$cntry <- rep(NA)
OECD_GDP_data <- country_code_2d(OECD_GDP_data)

# Subsetting only OECD countries that are part of the ESS
OECD_GDP_data_ess <- OECD_GDP_data[OECD_GDP_data$cntry != "", ]
rm(OECD_GDP_data)

table(OECD_GDP_data_ess$INDICATOR)
table(OECD_GDP_data_ess$FREQUENCY)
# 35 ESS countries available
length(table(OECD_GDP_data_ess$cntry))


##### ANALYSIS OF MISSING DATA
# Only three countries (AL, RS, RU), which are not part of our analyses, have the time series ending is 2020
tapply(OECD_GDP_data_ess$TIME, OECD_GDP_data_ess$cntry, length)
tapply(OECD_GDP_data_ess$TIME, OECD_GDP_data_ess$cntry, range)

### FILLING "ESS_data_context" table with GDP data
# Creating an empty variable for GDP during the country-year fieldwork year
ESS_data_context$GDP_y0 <- rep(NA, nrow(ESS_data_context))
# Creating an empty variable for GDP during the year previous to the country-year fieldwork year (i.e. t-1)
ESS_data_context$GDP_ym1 <- rep(NA, nrow(ESS_data_context))

# The assignment of GDP (and simultaneously transforming it GDP in 1000s USD)
for(i in 1:dim(ESS_data_context)[1]){
  if(dim(table(OECD_GDP_data_ess$cntry == as.character(ESS_data_context$cntry[i]))) == 2 & table(OECD_GDP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_GDP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))[2] == 1){
    x <- which(OECD_GDP_data_ess$cntry == as.character(ESS_data_context$cntry[i]) & OECD_GDP_data_ess$TIME == as.character(ESS_data_context$key_year[i]))
    ESS_data_context[i, c("GDP_ym1", "GDP_y0")] <- OECD_GDP_data_ess$Value[(x-1):x]/1000}
}

# checking how many country-year combinations are missing (7)
table(is.na(ESS_data_context[,dim(ESS_data_context)[2]]))[2]

# displaying which countries AND key quartals have missing GDP per capita
# since none of these countries are part of our analyses, it is not a problem that these 7 country-years are missing
ESS_data_context[which(is.na(ESS_data_context[,dim(ESS_data_context)[2]])), 3:4]



### deleting leading contextual variables (i.e. contextual variables measured at time t-1)
# (since we are only interested in the effects of contextual variables at the time of the respective fieldwork period)
library(data.table)
contextual_variables <- ESS_data_context

setDT(contextual_variables)
contextual_variables[, c("HUR_qm1", "SOCEXP_ym1", "GDP_ym1", "gini_disp_ym1") := NULL]



##### Merging three contextual variables with the individual-level ess_4 data
setDT(ess_4)
dim(ess_4) # 34 variables
# Creating a new ess_context datasat, which combines individual-level data with contextual-level data
ess_context <- contextual_variables[ess_4, on = .(cntry, essround)]
dim(ess_context) # 41 variables

# All four contextual variables are available for all country rounds - there are no missing values
table(is.na(ess_context$HUR_q0))
table(is.na(ess_context$SOCEXP_y0))
table(is.na(ess_context$GDP_y0))
table(is.na(ess_context$gini_disp_y0))





################################
### III. Centering variables ###
################################

##### The following script performs the following three crucial operations:
## 1. Subsets respondents who have observed values on all individual-level variables (i.e. listwise deletes the ESS data)
## 2. Centers economic activity within country-years/country-rounds
## 3. Decomposes the effects of the four contextual-level variables into cross-sectional effect (BE) and longitudinal effect (WE)


########## 1. MISSING DATA: listwise deletion
### Subsetting only respondents who have valid values on all individual-level variables entering the subsequent substantive analyses
# contextual variables are entered here in their original format (i.e. they are not centered within country-years)
ess_context$compl <- !is.na(ess_context$red_sup) & !is.na(ess_context$hinc_2_cop) & !is.na(ess_context$hinc_3_dif) & !is.na(ess_context$hinc_4_vdif) & 
  !is.na(ess_context$employed) & !is.na(ess_context$unemployed) & !is.na(ess_context$female) & !is.na(ess_context$age) & 
  !is.na(ess_context$isced2) & !is.na(ess_context$isced3) & !is.na(ess_context$isced4) & 
  !is.na(ess_context$isced56) & !is.na(ess_context$lrscale)

# creating a "listwise deletion" (_ld) data frame that contains only cases that have observed values on all variables
ess_4_ld <- ess_context[ess_context$compl == TRUE, ]
# After excluding the missing data at the individual level, the data set contains 319752 respondents. These respondents enter all the analyses reported in the article.
dim(ess_4_ld)
setDT(ess_4_ld)

# how many country-rounds are available (202)
length(table(ess_4_ld$cntry_round))

### France round 1 and France round 2 were excluded (because the subjective household income variable is not available in these two rounds)
table(ess_4_ld$cntry, ess_4_ld$essround)

# All contextual variables are available for all country rounds
table(is.na(ess_4_ld$HUR_q0))
table(is.na(ess_4_ld$SOCEXP_y0))
table(is.na(ess_4_ld$GDP_y0))
table(is.na(ess_4_ld$gini_disp_y0))



########## 2. Centering individual-level predictors: being unemployed and being employed
### PROGRAMMING TWO FUNCTIONS FOR GROUP-MEAN CENTERING (at the individual level)
########## ##### GROUP-MEAN CENTERING FUNCTIONS
center <- function(x) {
  if(!is.numeric(x)) {
    return(x) # if not numeric return the variable
  } else { # if numeric center
    x_mean <- mean(x, na.rm = TRUE)
    x_center <- x - x_mean
    return(x_center)
  }
}

center_df <- function(df, col_center, group) {
  require(data.table)
  df <- as.data.table(df)
  
  newcols <- paste0(col_center, "_CWC")
  
  df_center <- df[, c(newcols) := lapply(.SD, center),
                  by = eval(group), 
                  .SDcols = col_center]
  
  return(as.data.frame(df_center))
}
##### GROUP-MEAN CENTERING FUNCTIONS


##### Group-mean centering (CWC) the selected individual level predictor variables (at the country-round level)
dim(ess_4_ld)
ess_4_ld <- center_df(df = ess_4_ld, col_center = c("employed", "unemployed"), group = "cntry_round")



########## 3. Decomposing the effects of contextual-level variables into BEs and WEs
##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### #####
##### CONTEXTUAL VARIABLES (202 country-rounds) ##### ##### ##### ##### #####
##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### ##### #####
# this script checks how many unique values there are - maximum number CANNOT exceed the number of country rounds (e.g. 202 country-rounds)
context_names <- c("HUR_q0", "SOCEXP_y0", "GDP_y0", "gini_disp_y0")
for(i in 1:length(context_names)){
  unique_cr <- uniqueN(ess_4_ld, by = context_names[i])
  print(unique_cr)
}


# creating a matrix with contextual variables exported from the "ess_4_ld"
setDT(ess_4_ld)
ess_4_context <- ess_4_ld[, .(HUR_q0 = mean(HUR_q0), SOCEXP_y0 = mean(SOCEXP_y0), GDP_y0 = mean(GDP_y0), gini_disp_y0 = mean(gini_disp_y0)), by = c("cntry", "essround")]
for(i in 1:length(context_names)){
  unique_cr <- uniqueN(ess_4_context, by = context_names[i])
  print(unique_cr)
}


# computing the means of contextual variables within countries (to estimate the BE of contextual variables)
# these means are computed based on the above aggregated "ess_4_context" matrix
ess_4_context_means <- ess_4_context[, .(mean_HUR_q0 = mean(HUR_q0), mean_SOCEXP_y0 = mean(SOCEXP_y0), mean_GDP_y0 = mean(GDP_y0), 
                                         mean_gini_disp_y0 = mean(gini_disp_y0)), by = "cntry"]
# correlation matrix between contextual variables at the country level
cor(ess_4_context_means[, 2:5])

# merging the country means with the original contextual dataset (so that contextual variables could be centered within country-years)
ess_4_context <- ess_4_context_means[ess_4_context, on = "cntry"]


### Centering contextual variables within clusters (CWC)
ess_4_context$dev_mean_HUR_q0 <- ess_4_context$HUR_q0 - ess_4_context$mean_HUR_q0
ess_4_context$dev_mean_SOCEXP_y0 <- ess_4_context$SOCEXP_y0 - ess_4_context$mean_SOCEXP_y0
ess_4_context$dev_mean_GDP_y0 <- ess_4_context$GDP_y0 - ess_4_context$mean_GDP_y0
ess_4_context$dev_mean_gini_disp_y0 <- ess_4_context$gini_disp_y0 - ess_4_context$mean_gini_disp_y0

# Deleting the four variables that are already in ess_4_ld data. frame
ess_4_context[, c("HUR_q0", "SOCEXP_y0", "GDP_y0", "gini_disp_y0") := NULL]

# Computing correlations between respective pairs of variables (to check whether the BE and WE is orthogonal)
cor(ess_4_context$dev_mean_HUR_q0, ess_4_context$mean_HUR_q0)
cor(ess_4_context$dev_mean_SOCEXP_y0, ess_4_context$mean_SOCEXP_y0)
cor(ess_4_context$dev_mean_GDP_y0, ess_4_context$mean_GDP_y0)
cor(ess_4_context$dev_mean_gini_disp_y0, ess_4_context$mean_gini_disp_y0)



### Merging the decomposed contextual-level variables with the already prepared individual-level ess_4_ld data.frame
# Multilevel models will be computed on the resulting data.frame "ess_4_ld_models"
ess_4_ld_models <- ess_4_context[ess_4_ld, on = .(cntry, essround)]

# Deleting the "listwise deletion" (_ld) - since it does not have the prepared contextual variables and is therefore just a duplicate
rm(ess_4_ld)
rm(context_names, i, unique_cr, x)
